# code to replicate analysis of Ladd and Lenz data
rm(list=ls())
library(foreign)
library(Matching)
library(survey)
library(lattice)
# package that implements entropy balancing
library(ebal)

dat <- read.dta("LaddLenz.dta",convert.factors = F)

# prepare data     
dat <- dat[order(dat$tolabor),]
colnames(dat) <-gsub("copemg92","Prior Coping Mortgage",colnames(dat))
colnames(dat) <-gsub("conservative","Prior Conservative Identification",colnames(dat))
colnames(dat) <-gsub("liberal","Prior Liberal Identification",colnames(dat))
colnames(dat) <-gsub("white","White",colnames(dat))
colnames(dat) <-gsub("wkclass","Working-Class",colnames(dat))
colnames(dat) <-gsub("parent_labor","Parents Voted Labour",colnames(dat))
colnames(dat) <-gsub("f_ideo92","Prior Ideological Moderation",colnames(dat))
colnames(dat) <-gsub("vote_l_92","Prior Labour Vote",colnames(dat))
colnames(dat) <-gsub("vote_c_92","Prior Conservative Vote",colnames(dat))
colnames(dat) <-gsub("vote_lib_92","Prior Liberal Vote",colnames(dat))
colnames(dat) <-gsub("labfel92","Prior Labour Party Support",colnames(dat))
colnames(dat) <-gsub("confel92","Prior Conservative Party Support",colnames(dat))
colnames(dat) <-gsub("know_3","Prior Political Knowledge",colnames(dat))
colnames(dat) <-gsub("TVnewseither","Prior Television Viewer",colnames(dat))
colnames(dat) <-gsub("read_paper","Prior Daily Newspaper Reader",colnames(dat))
colnames(dat) <-gsub("ideo92","Prior Ideology",colnames(dat))
colnames(dat) <-gsub("auth92","Authoritarianism",colnames(dat))
colnames(dat) <-gsub("re75","Earnings 1975",colnames(dat))
colnames(dat) <-gsub("tusa92","Prior Trade Union Member",colnames(dat))
colnames(dat) <-gsub("hedqul92","Prior Education",colnames(dat))
colnames(dat) <-gsub("hhincq92","Prior Income",colnames(dat))
colnames(dat) <-gsub("hedqul92","Prior Education",colnames(dat))
colnames(dat) <-gsub("ragect92","Prior Age",colnames(dat))
colnames(dat) <-gsub("rsex92","Male",colnames(dat))
colnames(dat) <-gsub("hedqul92","Prior Education",colnames(dat))
colnames(dat) <-gsub("occupation2","Profession: Large Employer",colnames(dat))
colnames(dat) <-gsub("occupation3","Profession: Small Employer",colnames(dat))
colnames(dat) <-gsub("occupation4","Profession: Self Employed",colnames(dat))
colnames(dat) <-gsub("occupation5","Profession: Employee",colnames(dat))
colnames(dat) <-gsub("occupation6","Profession: Temporary Worker",colnames(dat))
colnames(dat) <-gsub("occupation7","Profession: Junior",colnames(dat))
colnames(dat) <-gsub("region2","North West",colnames(dat))
colnames(dat) <-gsub("region3","Yorks",colnames(dat))
colnames(dat) <-gsub("region4","West Midlands",colnames(dat))
colnames(dat) <-gsub("region5","East Midlands",colnames(dat))
colnames(dat) <-gsub("region6","East Anglia",colnames(dat))
colnames(dat) <-gsub("region7","SW England",colnames(dat))
colnames(dat) <-gsub("region8","SE England",colnames(dat))
colnames(dat) <-gsub("region9","Greater London",colnames(dat))
colnames(dat) <-gsub("region10","Wales",colnames(dat))
colnames(dat) <-gsub("region11","Scotland",colnames(dat))  
colnames(dat) <-gsub("labor","Prior Labour Identification",colnames(dat))
colnames(dat) <-gsub("toPrior Labour Identification","tolabor",colnames(dat))

# pick covars
covarsname <- colnames(dat)    
X <- dat[,covarsname]
W <- X[,"tolabor"]
Y <- X[,"vote_l_97"]
X <- X[,-c(1:2)]
cvars <- colnames(X)
X <- as.matrix(X)

# balance before matching
bout.nm  <- MatchBalance(W~X,match.out = NULL,ks=FALSE)
bal.nm   <- baltest.collect(matchbal.out=bout.nm ,var.names=colnames(X),after=FALSE)
round(bal.nm,2)

# Maha Dist Matching
mout.maha  <-  Match(Y,W,X,BiasAdjust=F,estimand="ATT",M=1)
summary(mout.maha)
bout.maha  <- MatchBalance(W~X,match.out = mout.maha,ks=FALSE)
bal.maha   <- baltest.collect(matchbal.out=bout.maha ,var.names=colnames(X),after=TRUE)
round(bal.maha,2)

# Genetic Matching
g.weights <- GenMatch(Tr=W, X=X, BalanceMatrix=X, estimand="ATT", M=1,print.level=0)
mout.gm   <-  Match(Y,W,X,BiasAdjust=F,Weight.matrix=g.weights,estimand="ATT",M=1)
summary(mout.gm)
bout.gm <- MatchBalance(W~X,match.out = mout.gm,print.level=0,ks=FALSE)
bal.gm <- baltest.collect(matchbal.out=bout.gm,var.names=colnames(X),after=TRUE)
round(bal.gm,2)

## Logistic PS weighting + matching
PS  <- glm(W~X,family=binomial(link=logit))$fitted
PSM <- PS
PS  <- PS[W==0]
PS  <- PS/(1-PS)

# PS Matching 
mout.psm <-  Match(Y,W,X=PSM,BiasAdjust=F,estimand="ATT",M=1)
summary(mout.psm)
bout.psm <- MatchBalance(W~X,match.out = mout.psm,print.level=0,ks=FALSE)
bal.psm <- baltest.collect(matchbal.out=bout.psm,var.names=colnames(X),after=TRUE)
round(bal.psm,2)

# PS Weighting
bout.psw <- MatchBalance(W~X,weights=c(PS,W[W==1]),ks=FALSE)
bal.psw <- baltest.collect(matchbal.out=bout.psw,var.names=colnames(X),after=FALSE)
round(bal.psw,2)

# Entropy Balancing
out.eb <- ebalance(
              Treatment=W,
              X=X
              )
           
bout.eb <- MatchBalance(W~X,weights=c(out.eb$w,W[W==1]),ks=FALSE)
bal.eb  <- baltest.collect(matchbal.out=bout.eb,var.names=colnames(X),after=F)
round(bal.eb,2)

# Entropy Balancing (with trimmed weights)
out.ebtr <- ebalance.trim(          
               ebalanceobj=out.eb
              )

bout.ebtr <- MatchBalance(W~X,weights=c(out.ebtr$w,W[W==1]),ks=FALSE)
bal.ebtr  <- baltest.collect(matchbal.out=bout.eb,var.names=colnames(X),after=FALSE)
round(bal.ebtr,2)

dat$w   <-  c(out.eb$w,W[W==1])
dat$wtr <-  c(out.ebtr$w,W[W==1])

# Effect Estimates
des <- svydesign(id=~1,weights=~w, data= dat)
mod1 <- svyglm(vote_l_97 ~ tolabor, design = des)
summary(mod1)

des <- svydesign(id=~1,weights=~wtr, data= dat)
mod2 <- svyglm(vote_l_97 ~ tolabor, design = des)
summary(mod2)

# Balance Plots
d  <- rbind(bal.nm,bal.maha,bal.gm,bal.psm,bal.psw,bal.eb)
dn <- rownames(d)
d  <- data.frame(d)
d$vname <- factor(dn,levels = unique(dn)[length(unique(dn)):1], labels = unique(dn)[length(unique(dn)):1])
d$gr <- rep(c("Unadjusted","MahaDist Matching","Genetic Matching","PS Matching","PS Weighting","Entropy Balancing"),each=length(unique(dn)))
d$gr <- factor(d$gr,levels=unique(d$gr)[1:length(unique(d$gr))],labels=unique(d$gr)[1:length(unique(d$gr))])

mypal<-c("black",rep("darkgrey",4),"black")[6:1]
Cex <- Cex2 <- 1
            
# plot with SDs
bplot <- function(x,y,...)
 {
 panel.abline(v=0, lwd = 1 , lty="solid")
 panel.abline(v=c(-.1,.1), lwd = 2 , lty="dotted")
 panel.abline(h=c(1:nrow(d)), lwd = 1 , lty="dashed", col="gray95")
 panel.xyplot(x,y,...)
 }

print(

xyplot(vname~round(sdiff.pooled/100,2),data=d,groups=gr,xlim=c(-.6,.6),
panel = bplot
 ,par.settings = list(superpose.symbol = list(pch = c(15,19,17,18,15,1)[6:1],col=mypal,cex=1))
 ,xlab=list("standardized difference in means",cex=Cex),ylab="",auto.key=T,scales=list(y=list(cex=Cex),x=list(cex=Cex2,at=c(-.5,-.1,0,.1,.5),labels=c("-.5","-.1","0",".1",".5")))
 )
)
# plot with p-values
print(
xyplot(vname~round(T.pval,2),data=d,groups=gr,xlim=c(-.05,1.05),
panel = bplot
 ,par.settings = list(superpose.symbol = list(pch = c(15,19,17,18,15,1)[6:1],col=mypal,cex=1))
 ,xlab=list("p-value: difference of means test",cex=Cex),ylab="",auto.key=T,scales=list(y=list(cex=Cex),x=list(cex=Cex2,at=c(0,.1,.2,.5,1),labels=c("0",".1",".2",".5","1")))
 )
)
# table
latex(round(cbind(bal.nm[,c(1:2)],bal.nm[,4]/100  ,bal.nm[,5:6],
                  bal.eb[,2]   ,bal.eb[,4]/100,bal.eb[,5:6],
                  bal.psw[,2]    ,bal.psw[,4]/100 ,bal.psw[,5:6])
            ,2)
            ,file="")


# sensitivity simulation
datt <- read.dta("endorsementEallprobit.dta",convert.factors = F)
datt <- datt[order(datt$tolabor),]
covarstosample <- colnames(datt)[-c(1:2)] 
datt$w   <-  c(out.eb$w,W[W==1])
des1 <- svydesign(id=~1,weights=~w, data= datt)

sims  <- 1000000
store <- matrix(NA,sims,2)
colnames(store) <- c("EB","OLS")
for(i in 1:sims){
 print(i) 
        ftmp <- "vote_l_97 ~ tolabor"
        bk   <- sample(covarstosample,sample(1:length(covarstosample),1),replace=F)
        ftmp <- as.formula(paste(ftmp, "+",paste(bk,collapse=" + ")))
        modw       <- svyglm(ftmp, design = des1)
        modols <- lm(ftmp, data = datt) 
 store[i,1:2] <-  c(coef(modw)["tolabor"],coef(modols)["tolabor"])
}
# Estimates  
summary(store)
dx1 <- density(store[,2])
# Plot
plot(dx1, type = "l", ylab = "Density", 
         xlab = "Estimated average treatment effect for the treated (US$)",
         xlim = range(c(dx1$x)), 
         ylim = range(c(dx1$y)),main="")
segments(y0=0,y1=46.7,x1=mean(store[,"EB"]),x0=mean(store[,"EB"]),lty="dashed",lwd=2)
arrows(y1=39,x1=.122,y0=41.5,x0=.136,length=.1) 
text(y=42,x=.155,labels="Estimates after Entropy Balancing") 

arrows(y0=16.5,x1=.129,y1=14.1,x0=.14,length=.1) 
text(y=18,x=.153,labels="Estimates before Entropy Balancing")
